/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.csv;

import static org.apache.commons.csv.Token.Type.TOKEN;

import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import java.util.stream.Collectors;

/**
 * Parses CSV files according to the specified format.
 *
 * Because CSV appears in many different dialects, the parser supports many formats by allowing the
 * specification of a {@link CSVFormat}.
 *
 * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
 *
 * <h2>Creating instances</h2>
 * <p>
 * There are several static factory methods that can be used to create instances for various types of resources:
 * </p>
 * <ul>
 *     <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
 *     <li>{@link #parse(String, CSVFormat)}</li>
 *     <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
 * </ul>
 * <p>
 * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
 *
 * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
 * </p>
 * <pre>
 * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
 *     ...
 * }
 * </pre>
 *
 * <h2>Parsing record wise</h2>
 * <p>
 * To parse a CSV input from a file, you write:
 * </p>
 *
 * <pre>
 * File csvData = new File(&quot;/path/to/csv&quot;);
 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
 * for (CSVRecord csvRecord : parser) {
 *     ...
 * }
 * </pre>
 *
 * <p>
 * This will read the parse the contents of the file using the
 * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
 * </p>
 *
 * <p>
 * To parse CSV input in a format like Excel, you write:
 * </p>
 *
 * <pre>
 * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
 * for (CSVRecord csvRecord : parser) {
 *     ...
 * }
 * </pre>
 *
 * <p>
 * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
 * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
 * </p>
 *
 * <h2>Parsing into memory</h2>
 * <p>
 * If parsing record wise is not desired, the contents of the input can be read completely into memory.
 * </p>
 *
 * <pre>
 * Reader in = new StringReader(&quot;a;b\nc;d&quot;);
 * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
 * List&lt;CSVRecord&gt; list = parser.getRecords();
 * </pre>
 *
 * <p>
 * There are two constraints that have to be kept in mind:
 * </p>
 *
 * <ol>
 *     <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
 *     the input, those records will not end up in the in memory representation of your CSV data.</li>
 *     <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
 *     parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
 * </ol>
 *
 * <h2>Notes</h2>
 * <p>
 * Internal parser state is completely covered by the format and the reader-state.
 * </p>
 *
 * @see <a href="package-summary.html">package documentation for more details</a>
 */
public final class CSVParser2 implements Iterable<CSVRecord>, Closeable {

	class CSVRecordIterator implements Iterator<CSVRecord> {
		private CSVRecord current;

		private CSVRecord getNextRecord() {
			try {
				return CSVParser.this.nextRecord();
			} catch (final IOException e) {
				throw new IllegalStateException(
						e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
			}
		}

		@Override
		public boolean hasNext() {
			if (CSVParser.this.isClosed()) {
				return false;
			}
			if (this.current == null) {
				this.current = this.getNextRecord();
			}

			return this.current != null;
		}

		@Override
		public CSVRecord next() {
			if (CSVParser.this.isClosed()) {
				throw new NoSuchElementException("CSVParser has been closed");
			}
			CSVRecord next = this.current;
			this.current = null;

			if (next == null) {
				// hasNext() wasn't called before
				next = this.getNextRecord();
				if (next == null) {
					throw new NoSuchElementException("No more CSV records available");
				}
			}

			return next;
		}

		@Override
		public void remove() {
			throw new UnsupportedOperationException();
		}
	}

	/**
	 * Creates a parser for the given {@link File}.
	 *
	 * @param file
	 *            a CSV file. Must not be null.
	 * @param charset
	 *            A Charset
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new parser
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either file or format are null.
	 * @throws IOException
	 *             If an I/O error occurs
	 */
	@SuppressWarnings("resource")
	public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
		Assertions.notNull(file, "file");
		Assertions.notNull(format, "format");
		return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format);
	}

	/**
	 * Creates a CSV parser using the given {@link CSVFormat}.
	 *
	 * <p>
	 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
	 * unless you close the {@code reader}.
	 * </p>
	 *
	 * @param inputStream
	 *            an InputStream containing CSV-formatted input. Must not be null.
	 * @param charset
	 *            a Charset.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new CSVParser configured with the given reader and format.
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either reader or format are null.
	 * @throws IOException
	 *             If there is a problem reading the header or skipping the first record
	 * @since 1.5
	 */
	@SuppressWarnings("resource")
	public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
			throws IOException {
		Assertions.notNull(inputStream, "inputStream");
		Assertions.notNull(format, "format");
		return parse(new InputStreamReader(inputStream, charset), format);
	}

	/**
	 * Creates a parser for the given {@link Path}.
	 *
	 * @param path
	 *            a CSV file. Must not be null.
	 * @param charset
	 *            A Charset
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new parser
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either file or format are null.
	 * @throws IOException
	 *             If an I/O error occurs
	 * @since 1.5
	 */
	public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
		Assertions.notNull(path, "path");
		Assertions.notNull(format, "format");
		return parse(Files.newInputStream(path), charset, format);
	}

	/**
	 * Creates a CSV parser using the given {@link CSVFormat}
	 *
	 * <p>
	 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
	 * unless you close the {@code reader}.
	 * </p>
	 *
	 * @param reader
	 *            a Reader containing CSV-formatted input. Must not be null.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new CSVParser configured with the given reader and format.
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either reader or format are null.
	 * @throws IOException
	 *             If there is a problem reading the header or skipping the first record
	 * @since 1.5
	 */
	public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
		return new CSVParser(reader, format);
	}

	/**
	 * Creates a parser for the given {@link String}.
	 *
	 * @param string
	 *            a CSV string. Must not be null.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new parser
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either string or format are null.
	 * @throws IOException
	 *             If an I/O error occurs
	 */
	public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
		Assertions.notNull(string, "string");
		Assertions.notNull(format, "format");

		return new CSVParser(new StringReader(string), format);
	}

	// the following objects are shared to reduce garbage

	/**
	 * Creates a parser for the given URL.
	 *
	 * <p>
	 * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
	 * you close the {@code url}.
	 * </p>
	 *
	 * @param url
	 *            a URL. Must not be null.
	 * @param charset
	 *            the charset for the resource. Must not be null.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @return a new parser
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either url, charset or format are null.
	 * @throws IOException
	 *             If an I/O error occurs
	 */
	public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
		Assertions.notNull(url, "url");
		Assertions.notNull(charset, "charset");
		Assertions.notNull(format, "format");

		return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
	}

	private final CSVFormat format;

	/** A mapping of column names to column indices */
	private final Map<String, Integer> headerMap;

	/** The column order to avoid re-computing it. */
	private final List<String> headerNames;

	private final Lexer lexer;

	private final CSVRecordIterator csvRecordIterator;

	/** A record buffer for getRecord(). Grows as necessary and is reused. */
	private final List<String> recordList = new ArrayList<>();

	/**
	 * The next record number to assign.
	 */
	private long recordNumber;

	/**
	 * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
	 * with {@link #recordNumber}.
	 */
	private final long characterOffset;

	private final Token reusableToken = new Token();

	/**
	 * Customized CSV parser using the given {@link CSVFormat}
	 *
	 * <p>
	 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
	 * unless you close the {@code reader}.
	 * </p>
	 *
	 * @param reader
	 *            a Reader containing CSV-formatted input. Must not be null.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either reader or format are null.
	 * @throws IOException
	 *             If there is a problem reading the header or skipping the first record
	 */
	public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
		this(reader, format, 0, 1);
	}

	/**
	 * Customized CSV parser using the given {@link CSVFormat}
	 *
	 * <p>
	 * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
	 * unless you close the {@code reader}.
	 * </p>
	 *
	 * @param reader
	 *            a Reader containing CSV-formatted input. Must not be null.
	 * @param format
	 *            the CSVFormat used for CSV parsing. Must not be null.
	 * @param characterOffset
	 *            Lexer offset when the parser does not start parsing at the beginning of the source.
	 * @param recordNumber
	 *            The next record number to assign
	 * @throws IllegalArgumentException
	 *             If the parameters of the format are inconsistent or if either reader or format are null.
	 * @throws IOException
	 *             If there is a problem reading the header or skipping the first record
	 * @since 1.1
	 */
	@SuppressWarnings("resource")
	public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
			throws IOException {
		Assertions.notNull(reader, "reader");
		Assertions.notNull(format, "format");

		this.format = format;
		this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
		this.csvRecordIterator = new CSVRecordIterator();
		this.headerMap = createHeaderMap(); // 1st
		this.headerNames = createHeaderNames(this.headerMap); // 2nd
		this.characterOffset = characterOffset;
		this.recordNumber = recordNumber - 1;
	}

	private void addRecordValue(final boolean lastRecord) {
		final String input = this.reusableToken.content.toString();
		final String inputClean = this.format.getTrim() ? input.trim() : input;
		if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
			return;
		}
		final String nullString = this.format.getNullString();
		this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
	}

	/**
	 * Closes resources.
	 *
	 * @throws IOException
	 *             If an I/O error occurs
	 */
	@Override
	public void close() throws IOException {
		if (this.lexer != null) {
			this.lexer.close();
		}
	}

	private Map<String, Integer> createEmptyHeaderMap() {
		return this.format.getIgnoreHeaderCase() ?
				new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
				new LinkedHashMap<>();
	}

	/**
	 * Creates the name to index mapping if the format defines a header.
	 *
	 * @return null if the format has no header.
	 * @throws IOException if there is a problem reading the header or skipping the first record
	 */
	private Map<String, Integer> createHeaderMap() throws IOException {
		Map<String, Integer> hdrMap = null;
		final String[] formatHeader = this.format.getHeader();
		if (formatHeader != null) {
			hdrMap = createEmptyHeaderMap();
			String[] headerRecord = null;
			if (formatHeader.length == 0) {
				// read the header from the first line of the file
				final CSVRecord nextRecord = this.nextRecord();
				if (nextRecord != null) {
					headerRecord = nextRecord.values();
				}
			} else {
				if (this.format.getSkipHeaderRecord()) {
					this.nextRecord();
				}
				headerRecord = formatHeader;
			}

			// build the name to index mappings
			if (headerRecord != null) {
				for (int i = 0; i < headerRecord.length; i++) {
					final String header = headerRecord[i];
					final boolean containsHeader = header == null ? false : hdrMap.containsKey(header);
					final boolean emptyHeader = header == null || header.trim().isEmpty();
					if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
						throw new IllegalArgumentException("The header contains a duplicate name: \"" + header
								+ "\" in " + Arrays.toString(headerRecord));
					}
					if (header != null) {
						hdrMap.put(header, Integer.valueOf(i));
					}
				}
			}
		}
		return hdrMap;
	}

	private List<String> createHeaderNames(final Map<String, Integer> headerMap) {
		// @formatter:off
		return headerMap == null ? null
				: headerMap.entrySet().stream()
				.sorted(Map.Entry.comparingByValue())
				.map(Map.Entry::getKey)
				.collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
		// @formatter:on
	}

	/**
	 * Returns the current line number in the input stream.
	 *
	 * <p>
	 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
	 * the record number.
	 * </p>
	 *
	 * @return current line number
	 */
	public long getCurrentLineNumber() {
		return this.lexer.getCurrentLineNumber();
	}

	/**
	 * Gets the first end-of-line string encountered.
	 *
	 * @return the first end-of-line string
	 * @since 1.5
	 */
	public String getFirstEndOfLine() {
		return lexer.getFirstEol();
	}

	/**
	 * Returns a copy of the header map.
	 * <p>
	 * The map keys are column names. The map values are 0-based indices.
	 * </p>
	 * @return a copy of the header map.
	 */
	public Map<String, Integer> getHeaderMap() {
		if (this.headerMap == null) {
			return null;
		}
		final Map<String, Integer> map = createEmptyHeaderMap();
		map.putAll(this.headerMap);
		return map;
	}

	/**
	 * Returns the header map.
	 *
	 * @return the header map.
	 */
	Map<String, Integer> getHeaderMapRaw() {
		return this.headerMap;
	}

	/**
	 * Returns a read-only list of header names that iterates in column order.
	 *
	 * @return read-only list of header names that iterates in column order.
	 * @since 1.7
	 */
	public List<String> getHeaderNames() {
		return headerNames;
	}

	/**
	 * Returns the current record number in the input stream.
	 *
	 * <p>
	 * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
	 * the line number.
	 * </p>
	 *
	 * @return current record number
	 */
	public long getRecordNumber() {
		return this.recordNumber;
	}

	/**
	 * Parses the CSV input according to the given format and returns the content as a list of
	 * {@link CSVRecord CSVRecords}.
	 *
	 * <p>
	 * The returned content starts at the current parse-position in the stream.
	 * </p>
	 *
	 * @return list of {@link CSVRecord CSVRecords}, may be empty
	 * @throws IOException
	 *             on parse error or input read-failure
	 */
	public List<CSVRecord> getRecords() throws IOException {
		CSVRecord rec;
		final List<CSVRecord> records = new ArrayList<>();
		while ((rec = this.nextRecord()) != null) {
			records.add(rec);
		}
		return records;
	}

	/**
	 * Gets whether this parser is closed.
	 *
	 * @return whether this parser is closed.
	 */
	public boolean isClosed() {
		return this.lexer.isClosed();
	}

	/**
	 * Returns an iterator on the records.
	 *
	 * <p>
	 * An {@link IOException} caught during the iteration are re-thrown as an
	 * {@link IllegalStateException}.
	 * </p>
	 * <p>
	 * If the parser is closed a call to {@link Iterator#next()} will throw a
	 * {@link NoSuchElementException}.
	 * </p>
	 */
	@Override
	public Iterator<CSVRecord> iterator() {
		return csvRecordIterator;
	}

	/**
	 * Parses the next record from the current point in the stream.
	 *
	 * @return the record as an array of values, or {@code null} if the end of the stream has been reached
	 * @throws IOException
	 *             on parse error or input read-failure
	 */
	CSVRecord nextRecord() throws IOException {
		CSVRecord result = null;
		this.recordList.clear();
		StringBuilder sb = null;
		final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
		do {
			this.reusableToken.reset();
			this.lexer.nextToken(this.reusableToken);
			switch (this.reusableToken.type) {
				case TOKEN:
					this.addRecordValue(false);
					break;
				case EORECORD:
					this.addRecordValue(true);
					break;
				case EOF:
					if (this.reusableToken.isReady) {
						this.addRecordValue(true);
					}
					break;
				case INVALID:
					throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
				case COMMENT: // Ignored currently
					if (sb == null) { // first comment for this record
						sb = new StringBuilder();
					} else {
						sb.append(Constants.LF);
					}
					sb.append(this.reusableToken.content);
					this.reusableToken.type = TOKEN; // Read another token
					break;
				default:
					throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
			}
		} while (this.reusableToken.type == TOKEN);

		if (!this.recordList.isEmpty()) {
			this.recordNumber++;
			final String comment = sb == null ? null : sb.toString();
			result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]),
					comment, this.recordNumber, startCharPosition);
		}
		return result;
	}

}